county_fips = read_csv("./data/all-geocodes-v2016.csv", skip = 4) %>%
  janitor::clean_names() %>%
  rename(county_name = area_name_including_legal_statistical_area_description) %>%
  filter(county_code_fips != "000") %>%
  mutate(state_county_fips = paste0(state_code_fips, county_code_fips)) %>%
  select (state_code_fips, county_code_fips, state_county_fips, county_name)
## Parsed with column specification:
## cols(
##   `Summary Level` = col_character(),
##   `State Code (FIPS)` = col_character(),
##   `County Code (FIPS)` = col_character(),
##   `County Subdivision Code (FIPS)` = col_character(),
##   `Place Code (FIPS)` = col_character(),
##   `Consolidtated City Code (FIPS)` = col_character(),
##   `Area Name (including legal/statistical area description)` = col_character()
## )
state_fips = read_csv("./data/state-geocodes-v2016.csv", skip = 5) %>%
  janitor::clean_names() %>%
  filter(state_fips != "00") %>%
  select(state_fips, name) %>%
  mutate(state_fips = as.numeric(state_fips)) %>%
  rename(state = name)
## Parsed with column specification:
## cols(
##   Region = col_double(),
##   Division = col_double(),
##   `State (FIPS)` = col_character(),
##   Name = col_character()
## )
bee_county = read_csv("./data/bee_county/Bee_Colony_Census_Data_by_County 2.csv") %>% 
  janitor::clean_names() %>% 
  select(year, 
         state, 
         state_ansi, 
         ag_district, 
         ag_district_code, 
         county, 
         county_ansi, 
         value, 
         cv_percent) 
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Period = col_character(),
##   State = col_character(),
##   `State ANSI` = col_double(),
##   `Ag District` = col_character(),
##   `Ag District Code` = col_double(),
##   County = col_character(),
##   `County ANSI` = col_double(),
##   Value = col_character(),
##   `CV (%)` = col_character()
## )
 bee_county$state_ansi = stringr::str_pad(bee_county$state_ansi,2 , pad = "0")
bee_county$county_ansi = stringr::str_pad(bee_county$county_ansi,3 , pad = "0")

bee_county = bee_county%>% 
  mutate(state_county_fips = paste0(state_ansi, county_ansi),
    value = replace(value, value == "(D)", "NA"),
    cv_percent = replace(cv_percent, cv_percent == "(D)", "NA"),
    colony_count = value)
#create function to import data
file_name <- list.files(path = "./data/bee_state_2/") 

df = read_csv(file = str_c("./data/bee_state_2/", file_name[1])) %>%
  mutate(file = file_name[1])
## Parsed with column specification:
## cols(
##   `2` = col_double(),
##   t = col_character(),
##   `Honey: Released February 28, 2002, by the National Agricultural Statistics Service (NASS), Agricultural Statistics Board, U.S. Department of Agriculture.` = col_character(),
##   X4 = col_character(),
##   X5 = col_character(),
##   X6 = col_character(),
##   X7 = col_character(),
##   X8 = col_character(),
##   X9 = col_character()
## )
  df
## # A tibble: 51 x 10
##      `2` t     `Honey: Released … X4    X5    X6    X7    X8    X9    file 
##    <dbl> <chr> <chr>              <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1     2 t     Honey:  Number of… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  2     2 t     and Value by Stat… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  3     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  4     2 h     <NA>               Honey Yield <NA>  <NA>  Aver… Value 2002…
##  5     2 h     State              Prod… per   Prod… Stoc… Pric… of    2002…
##  6     2 h     <NA>               Colo… Colo… <NA>  Dec … Poun… Prod… 2002…
##  7     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  8     2 u     <NA>               1,000 Poun… 1,00… 1,00… Cents 1,00… 2002…
##  9     2 d     AL                 16    78    1248  187   59    736   2002…
## 10     2 d     AZ                 40    59    2360  1322  73    1723  2002…
## # … with 41 more rows
my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}

my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}
  
bee_data = 
  tibble(
    file_names = file_name,
    path = str_c("./data/bee_state_2/", file_names)
  ) %>% 
  mutate(data = map(path, my_read_csv))%>% 
  unnest()
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
#clean data set
clean_bee_data =
  bee_data %>%
 separate(file_names, into = c("year", "remove"), sep = ".c") %>%
  select(-remove, -X1, -X2, -path) %>%
  rename(state = X3, honey_producing_colonies = X4, yield_per_colony = X5, production = X6, stocks = X7, price_per_pound = X8, production_value = X9) %>%
  drop_na(state) %>%
  mutate(state = recode(state, AL = "Alabama", AR = "Arkansas", AZ = "Arizona", CA = "California", CO = "Colorado", FL = "Florida", GA = "Georgia", HI = "Hawaii", IA = "Iowa", IL = "Illinois", ID = "Idaho", IN = "Indiana", KS = "Kansas", KY = "Kentucky", LA = "Louisiana", ME = "Maine", MD = "Maryland", MI = "Michigan", MN = "Minnesota", MO = "Missouri", MT = "Montana", MS = "Mississippi", NC = "North Carolina", ND = "North Dakota", NE = "Nebraska", NJ = "New Jersey", NM = "New Mexico", NV = "Nevada", NY = "New York", OH = "Ohio", OK = "Oklahoma", OR = "Oregon", PA = "Pennsylvania", SC = "South Carolina", SD = "South Dakota", TN = "Tennessee", TX = "Texas", UT = "Utah", VA = "Virginia", VT = "Vermont", WA = "Washington", WV = "West Virginia", WI = "Wisconsin", WY = "Wyoming"  ))

clean_bee_data %>%
  ggplot(aes(x = year, y = honey_producing_colonies, color = state)) +
  geom_point()

pest_2002 = read_excel("./data/pesticides_csv/EPest.county.estimates.2002.xlsx") %>% 
  janitor::clean_names() %>% 
  mutate(
    state_fips = state_fips_code,
    county_fips = county_fips_code,
    state_county_fips = paste0(state_fips, county_fips),
    epest_low_kg = round(epest_low_kg),
    epest_high_kg = round(epest_high_kg)) %>%
  select(-state_fips_code, -county_fips_code)
    
pest_2002 %>% 
  group_by(year, state_county_fips, compound) %>% 
  ggplot(aes(x = compound, y = epest_high_kg)) + 
  geom_col() 

unique(pull(pest_2002, compound))
state_bee_fips = full_join(clean_bee_data, state_fips, by = "state")

top_pesticides = read_csv("./data/top_pesticides.csv") %>%
    mutate(state_fips = as.numeric(state_fips))
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
state_pest_data = 
  top_pesticides %>%
  group_by(state_fips, year, compound) %>%
  summarize(low = sum(epest_low_kg), high = sum(epest_high_kg)) 
state_bee_yrfips = 
  state_bee_fips %>%
  mutate(yearfips = paste0(year, state_fips))

state_pest_yrfips = 
  state_pest_data %>%
  mutate(yearfips = paste0(year, state_fips))

merged_state_data = full_join(state_bee_yrfips, state_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, state_fips.x, honey_producing_colonies, yield_per_colony, production, compound, low, high) %>%
  drop_na(year.x, state, compound, high)
county_bee_yrfips = 
  bee_county %>%
  mutate(yearfips = paste0(year, state_county_fips))
         
county_pest_yrfips = 
  top_pesticides %>%
  mutate(state_county_fips = as.numeric(state_county_fips) ,
         yearfips = paste0(year, state_county_fips))

merged_county_data = full_join(county_bee_yrfips, county_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, county, county_fips, colony_count, compound, epest_low_kg, epest_high_kg) %>%
  drop_na(year.x, state, county, colony_count, compound, epest_high_kg)

577258fd9b1fc9fa12e0acd2c93aaeec368444b1

corr_2004 = read_csv("./data/top_pesticides.csv") %>% 
  group_by(year, compound) %>% 
  summarise(tot_pest_high = sum(epest_high_kg)) %>% 
  filter(year == 2004) %>% 
  select(compound, tot_pest_high) %>%
  pivot_wider(
    names_from = "compound",
    values_from = "tot_pest_high",
  )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
## Adding missing grouping variables: `year`
cor(corr_2004, use = "everything", method = c("pearson"))
##                year CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## year             NA             NA           NA           NA       NA
## CHLOROTHALONIL   NA             NA           NA           NA       NA
## CHLORPYRIFOS     NA             NA           NA           NA       NA
## CLOTHIANIDIN     NA             NA           NA           NA       NA
## FIPRONIL         NA             NA           NA           NA       NA
## IMIDACLOPRID     NA             NA           NA           NA       NA
## THIACLOPRID      NA             NA           NA           NA       NA
##                IMIDACLOPRID THIACLOPRID
## year                     NA          NA
## CHLOROTHALONIL           NA          NA
## CHLORPYRIFOS             NA          NA
## CLOTHIANIDIN             NA          NA
## FIPRONIL                 NA          NA
## IMIDACLOPRID             NA          NA
## THIACLOPRID              NA          NA